#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@Time    :   2021/08/12 20:00:18
@Author  :   Leo Wood 
@Contact :   leowood@foxmail.com
'''


import json
from pyMySql import pySql
import os
import sys
os.chdir(sys.path[0])
from tqdm import tqdm
import pandas as pd


with open('db.json', 'r', encoding='utf-8') as f:
    db_info = json.load(f)
db_info = db_info['Medical_Knowledge']
db_server = pySql(ip=db_info['ip'], user=db_info['user'], pwd=db_info['pwd'], db=db_info['db'])

class_dict = {'身体':'1','疾病':'2','药物':'3','治疗':'4','器材':'5','症状':'6'}

# term_dict = {'身体':[],'疾病':[],'药物':[],'治疗':[],'器材':[]}

# count = 1000
# for root,_,file_names in os.walk('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/sougou_wordlist/selected_txt_from_txt-bank'):
#     for file_name in tqdm(file_names):
#         label = file_name.split('_')[0]
#         if label in class_dict.keys():
#             label_num = class_dict[label]
#             with open(os.path.join(root,file_name),'r') as f:
#                 for line in f.readlines():
#                     line = line.strip()
#                     if line:
#                         if line not in term_dict[label]:
#                             count += 1
#                             term_dict[label].append(line)
#                             line = line.replace("'","''") ## 转义字符
#                             sql = "insert into Categorized_terms(termId,termText,categoryId) values ({},'{}',{})".format(count,line,label_num)
#                             db_server.write_sql(sql)
# db_server.close()


# with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/sougou_wordlist/selected_txt_from_txt-bank/clean_data.json','w') as f:
#     json.dump(term_dict,f)



### 补充ding收集的词表

# id = 108996
# count = 0

# import pandas as pd
# df = pd.read_excel('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/sougou_wordlist/entities_sougou_ding.xlsx')

# label_dict = {'人体组织':'身体', '医学仪器':'器材', '手术':'治疗', '疾病':'疾病', '穴位':'身体', '药物':'药物'}
# import json

# with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/sougou_wordlist/selected_txt_from_txt-bank/clean_data.json','r') as f:
#     term_dict=json.load(f)

# count = 0
# for key,value in label_dict.items():
#     label_words = df.loc[df['category'] == key]['term'].tolist()
#     label_num = class_dict[value]
#     for word in label_words:
#         word = word.strip()
#         if word:
#             if word not in term_dict[value]:
#                 count += 1
#                 id += 1
#                 term_dict[value].append(word)
#                 sql = "insert into Categorized_terms(termId,termText,categoryId) values ({},'{}',{})".format(id,word,label_num)
#                 db_server.write_sql(sql)
#     print(key,' Done')


# db_server.close()   

## 修缮词表


# class_dict = {'身体部位':1,'疾病':2,'药物':3,'治疗方法':4,'器材及设备':5,'症状':6,'检查':7}

# ## 获取最新termdict
# term_dict = {}

# for label,num in class_dict.items():
#     term_dict[label] = []
#     sql = "select termText from Categorized_terms where categoryId={}".format(num)
#     results = db_server.read_sql(sql)
#     label_words = [result[0] for result in results]
#     term_dict[label] = label_words


# ## 加入症状类别

# id = 119658

# with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/sougou_wordlist/selected_txt_from_txt-bank/症状_1中医症候.txt','r') as f:
#     for line in f.readlines():
#         word = line.strip()
#         if word not in term_dict['症状']:
#             term_dict['症状'].append(word)
#             id += 1
#             sql = "insert into Categorized_terms(termId,termText,categoryId) values ({},'{}',{})".format(id,word,6)
#             db_server.write_sql(sql)

# print("症状Done")

# # 插入检查


# with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/sougou_wordlist/selected_txt_from_txt-bank/检查_1_医学检验.txt','r') as f:
#     jiancha = [line.strip() for line in f.readlines()]

# for word in jiancha:
#     if word not in term_dict['检查']:
#         term_dict['检查'].append(word)
#         id += 1
#         sql = "insert into Categorized_terms(termId,termText,categoryId) values ({},'{}',{})".format(id,word,7)
#         db_server.write_sql(sql)
# db_server.close()  
# print("检查Done")

# ## 插入ding中检查和症状

# df = pd.read_excel('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/sougou_wordlist/entities_sougou_ding.xlsx')

# for key in ['症状']:
#     label_words = df.loc[df['category'] == key]['term'].tolist()
#     for word in label_words:
#         word = word.strip()
#         if word:
#             if word not in term_dict['症状']:
#                 id += 1
#                 term_dict['症状'].append(word)
#                 sql = "insert into Categorized_terms(termId,termText,categoryId) values ({},'{}',{})".format(id,word,6)
#                 db_server.write_sql(sql)
#     print(key,' Done')

# for key in ['检查']:
#     label_words = df.loc[df['category'] == key]['term'].tolist()
#     for word in label_words:
#         word = word.strip()
#         if word:
#             if word not in term_dict['检查']:
#                 id += 1
#                 term_dict['检查'].append(word)
#                 sql = "insert into Categorized_terms(termId,termText,categoryId) values ({},'{}',{})".format(id,word,7)
#                 db_server.write_sql(sql)
#     print(key,' Done')


# db_server.close()   
# exit()

### 插入MedicalKG 数据

id = 119797

class_dict = {'身体部位':1,'疾病':2,'药物':3,'治疗方法':4,'器材及设备':5,'症状':6,'检查':7}

## 获取最新termdict
term_dict = {}

for label,num in class_dict.items():
    term_dict[label] = []
    sql = "select termText from Categorized_terms where categoryId={}".format(num)
    results = db_server.read_sql(sql)
    label_words = [result[0] for result in results]
    term_dict[label] = label_words

kg_dict = {'症状':'症状', '疾病':'疾病', '部位':'身体部位', '检查':'检查', '治疗':'治疗方法'}

## 读取medicalKG
medicalKG = {}
with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/old_files/medical_terms/MedicalKG.txt','r') as f:
    for line in f.readlines():
        line = line.strip()
        if line:
            key = line.split()[2]
            value = line.split()[0]
            if key not in medicalKG:
                medicalKG[key] = []
                medicalKG[key].append(value)
            else:
                medicalKG[key].append(value)

for key,value in tqdm(medicalKG.items()):
    for word in value:
        if word not in term_dict[kg_dict[key]]:
            id += 1
            sql = "insert into Categorized_terms(termId,termText,categoryId) values ({},'{}',{})".format(id,word,class_dict[kg_dict[key]])
            db_server.write_sql(sql)


db_server.close()   






# import json
# with open('/Users/leo/OneDrive/Phd/实验数据/医学知识库收集/sougou_wordlist/selected_txt_from_txt-bank/clean_data.json','r') as f:
#     term_dict=json.load(f)

# label_dict = {'人体组织':'身体', '医学仪器':'器材', '手术':'治疗', '疾病':'疾病', '穴位':'身体', '药物':'药物'}


if __name__ == '__main__':
    pass